from openai import OpenAI
import json
from tqdm import tqdm
import pandas as pd

# deepseek-coder
def deepseekchat(prefix, suffix):
    client = OpenAI(api_key="", base_url="https://api.deepseek.com/beta")
    response = client.completions.create(
        model="deepseek-chat",
        prompt=prefix,
        suffix=suffix,
        max_tokens=4000,
    )
    result = response.choices[0].text
    return result

data = open('Q_B_without_answer.jsonl', 'r', encoding='utf-8')
count = 0
count2 = 0
count_len = []

save_list = []
count_len = []
save_result = open('result_deepseek2.json', 'w', encoding='utf-8')
for item in tqdm(data.readlines()):
    item = json.loads(item)
    prefix = item['prefix']
    fim_suffix = item['fim_suffix']
    content = deepseekchat(prefix, fim_suffix)
    content = content.replace('```', '').replace('java', '').replace('script', '').replace('python', '')
    print(content)
    save_list.append([content])
    count_len.append(len(content))
print(pd.DataFrame(count_len).describe(percentiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]))
str_code = json.dumps(save_list, ensure_ascii=False)
save_result.write(str_code)
save_result.close()
